In [1]:
# data manipulation
import numpy as np
import pandas as pd
# decomposition
from sklearn.decomposition import PCA
# encoding labels
from sklearn.preprocessing import LabelEncoder
# scaler
from sklearn.preprocessing import StandardScaler

In [2]:
ds = ['emodb', 'emovo', 'ravdess']

In [3]:
md = {}
for d in ds:
    md[d] = pd.read_csv(f"/datasets/nicolas_facchinetti/processed_data/{d}/metadata_pre_aug.csv")
    
    # md[d] = pd.read_csv(f"D:\\Scuola\\TesiMag\\thesis_adversarial_ml_audio\\processed_data/{d}/metadata_pre_aug.csv")

In [4]:
md['emodb']

Unnamed: 0,path,filename,chunk,label,actor,gender
0,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,0,sad,13,f
1,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,1,sad,13,f
2,/datasets/nicolas_facchinetti/emodb/data/14b09...,14b09Ac.wav,0,fear,14,f
3,/datasets/nicolas_facchinetti/emodb/data/11b02...,11b02Na.wav,0,neutral,11,m
4,/datasets/nicolas_facchinetti/emodb/data/09b03...,09b03Fd.wav,0,happy,9,f
...,...,...,...,...,...,...
675,/datasets/nicolas_facchinetti/emodb/data/11b03...,11b03Td.wav,1,sad,11,m
676,/datasets/nicolas_facchinetti/emodb/data/12b02...,12b02Na.wav,0,neutral,12,m
677,/datasets/nicolas_facchinetti/emodb/data/14b01...,14b01Eb.wav,0,disgust,14,f
678,/datasets/nicolas_facchinetti/emodb/data/14b01...,14b01Eb.wav,1,disgust,14,f


In [5]:
sgrams = {}
for d in ds:
    sgrams[d] = np.load(f"/datasets/nicolas_facchinetti/processed_data/{d}/sgram_pre_aug.npy")
    # sgrams[d] = sgram_ravdess = np.load(f"D:\\Scuola\\TesiMag\\thesis_adversarial_ml_audio\\processed_data/{d}/sgram_pre_aug.npy")

np.shape(sgrams['emodb'])

(680, 128, 261)

Since the samples are in tabular form, flatten each samples' matrix to a row. Also standardize the data

In [6]:
for d in ds:
    sgrams[d] = [sample.flatten() for sample in sgrams[d]]
    scaler = StandardScaler()
    sgrams[d] = scaler.fit_transform(sgrams[d])

Extract principal components

In [7]:
def get_components(logmelspectrograms):
    # apply PCA to data
    pca = PCA(0.9)
    principalComponents = pca.fit_transform(logmelspectrograms)
    print(f"Number of extracted components {pca.n_components_}")
    return principalComponents

In [8]:
pc = {}
for d in ds:
    print(f'For {d}')
    pc[d] = get_components(sgrams[d])

For emodb
Number of extracted components 256
For emovo
Number of extracted components 168
For ravdess
Number of extracted components 156


In [9]:
pd.DataFrame(pc['emodb'])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,246,247,248,249,250,251,252,253,254,255
0,20.388610,29.089204,-33.917645,-18.569226,6.067989,-8.882931,-0.688861,8.827057,8.848717,-14.225419,...,-0.100125,3.857443,-1.189150,-5.210749,-2.557228,8.787890,3.005838,0.038363,1.712366,-1.744635
1,69.059331,29.533312,-54.511986,-32.754742,-0.019496,-12.639450,7.753554,16.425557,9.483744,-14.275212,...,-0.771671,-6.589970,2.116073,-3.195187,-0.763356,1.509613,2.702545,0.592417,-4.084855,4.046255
2,-83.486472,-4.738516,6.170955,17.532081,6.809880,-3.472392,-5.603520,-6.854469,-3.514991,15.921548,...,-3.496690,-0.055174,6.359514,3.998968,-2.351081,-6.551988,0.131609,0.479675,3.674968,0.638430
3,80.825051,34.912518,32.316570,-14.833888,41.637621,4.024758,-38.799674,9.385691,-29.166832,-17.015674,...,-2.330586,-4.338018,0.991366,-5.451828,-3.425486,-4.906212,2.585623,0.949577,-3.401142,-4.707707
4,-111.807459,-25.951806,-1.333098,-10.225136,5.897344,-8.074732,-4.138928,5.778704,5.321723,6.479432,...,-0.341178,-1.631303,-9.266901,7.619178,1.998305,-4.741072,5.342593,-4.222276,6.803871,-3.800254
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
675,131.155368,12.696413,1.447879,-27.277145,-42.352139,-0.036546,19.635086,-2.170521,28.372700,-4.617190,...,-2.774763,1.602340,2.661717,2.556510,0.972311,-1.182181,2.508410,0.689024,0.504173,4.705253
676,39.655215,8.761926,42.967994,9.808666,34.281017,27.476781,-38.222569,23.414401,13.982044,8.633630,...,-1.242589,1.947879,-8.258554,-3.484053,0.341690,-3.750518,0.480906,-6.272596,-5.274757,4.411979
677,-13.902353,2.388572,0.731893,15.937000,-0.326478,1.920340,-12.609373,-9.248249,18.015236,-11.685856,...,2.061166,0.251076,0.993828,-0.248513,-8.392963,-2.390307,1.917414,-3.149088,4.157119,2.803557
678,619.125264,22.241687,3.818788,-8.224830,-6.785167,-5.986959,-13.270448,-8.089205,10.746739,9.656692,...,1.798618,1.001811,-1.267190,0.221679,3.361815,1.133309,2.061737,1.427608,-0.242549,-0.518544


In [10]:
pc['emodb'].shape

(680, 256)

In [11]:
md['emodb']

Unnamed: 0,path,filename,chunk,label,actor,gender
0,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,0,sad,13,f
1,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,1,sad,13,f
2,/datasets/nicolas_facchinetti/emodb/data/14b09...,14b09Ac.wav,0,fear,14,f
3,/datasets/nicolas_facchinetti/emodb/data/11b02...,11b02Na.wav,0,neutral,11,m
4,/datasets/nicolas_facchinetti/emodb/data/09b03...,09b03Fd.wav,0,happy,9,f
...,...,...,...,...,...,...
675,/datasets/nicolas_facchinetti/emodb/data/11b03...,11b03Td.wav,1,sad,11,m
676,/datasets/nicolas_facchinetti/emodb/data/12b02...,12b02Na.wav,0,neutral,12,m
677,/datasets/nicolas_facchinetti/emodb/data/14b01...,14b01Eb.wav,0,disgust,14,f
678,/datasets/nicolas_facchinetti/emodb/data/14b01...,14b01Eb.wav,1,disgust,14,f


In [12]:
a = md['emodb'][md['emodb']['label'] == 'sad']
a

Unnamed: 0,path,filename,chunk,label,actor,gender
0,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,0,sad,13,f
1,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,1,sad,13,f
10,/datasets/nicolas_facchinetti/emodb/data/03b02...,03b02Tb.wav,0,sad,3,m
11,/datasets/nicolas_facchinetti/emodb/data/03b02...,03b02Tb.wav,1,sad,3,m
28,/datasets/nicolas_facchinetti/emodb/data/14b02...,14b02Tc.wav,0,sad,14,f
...,...,...,...,...,...,...
652,/datasets/nicolas_facchinetti/emodb/data/16a04...,16a04Tc.wav,0,sad,16,f
662,/datasets/nicolas_facchinetti/emodb/data/16b01...,16b01Tb.wav,0,sad,16,f
663,/datasets/nicolas_facchinetti/emodb/data/16b01...,16b01Tb.wav,1,sad,16,f
674,/datasets/nicolas_facchinetti/emodb/data/11b03...,11b03Td.wav,0,sad,11,m


In [13]:
b = md['emodb'][md['emodb']['label'] == 'fear']
b

Unnamed: 0,path,filename,chunk,label,actor,gender
2,/datasets/nicolas_facchinetti/emodb/data/14b09...,14b09Ac.wav,0,fear,14,f
43,/datasets/nicolas_facchinetti/emodb/data/03b10...,03b10Ab.wav,0,fear,3,m
46,/datasets/nicolas_facchinetti/emodb/data/09b10...,09b10Aa.wav,0,fear,9,f
47,/datasets/nicolas_facchinetti/emodb/data/08a01...,08a01Ab.wav,0,fear,8,f
57,/datasets/nicolas_facchinetti/emodb/data/11a04...,11a04Ac.wav,0,fear,11,m
...,...,...,...,...,...,...
602,/datasets/nicolas_facchinetti/emodb/data/10b09...,10b09Ad.wav,0,fear,10,m
611,/datasets/nicolas_facchinetti/emodb/data/13a02...,13a02Ad.wav,0,fear,13,f
616,/datasets/nicolas_facchinetti/emodb/data/14a01...,14a01Aa.wav,0,fear,14,f
648,/datasets/nicolas_facchinetti/emodb/data/14b10...,14b10Ad.wav,0,fear,14,f


In [14]:
a_data = pc['emodb'][a.index]
b_data = pc['emodb'][b.index]

In [15]:
b_data.shape

(72, 256)

In [16]:
b_data[0].shape

(256,)

Numpy cross correlation

In [17]:
np.correlate(a_data[0], b_data[0])

array([-2256.25799393])

In [18]:
md['emodb'].label.unique()

array(['sad', 'fear', 'neutral', 'happy', 'bored', 'angry', 'disgust'],
      dtype=object)

Compute cross correlation between each pair of samples for each label

In [19]:
corr = {}

for d in ds:
    print(d)
    corr[d] = []
    labels = md[d].label.unique()
    
    for l1 in labels:
        a = md[d][md[d]['label'] == l1]
        a_data = pc[d][a.index]
        l_corr = []
        for l2 in labels:
            b = md[d][md[d]['label'] == l2]
            b_data = pc[d][b.index]
            
            corr_list = [np.correlate(s1, s2) for s1 in a_data for s2 in b_data]
            l_corr.append(np.average(corr_list))
        corr[d].append(l_corr)

emodb
emovo
ravdess


In [20]:
corr_dfs = {}

for d in ds:
    label = md[d].label.unique()
    corr_dfs[d] = pd.DataFrame(corr[d], columns=label, index=label)

In [21]:
corr_dfs['emodb']

Unnamed: 0,sad,fear,neutral,happy,bored,angry,disgust
sad,1932.551042,-1431.216431,103.387519,-788.835933,1048.978236,-947.501163,-418.209135
fear,-1431.216431,2375.477027,230.783169,334.122339,-1144.011025,298.400096,301.993822
neutral,103.387519,230.783169,815.050001,-438.95906,462.241119,-739.671808,-8.123623
happy,-788.835933,334.122339,-438.95906,668.710275,-668.395799,733.05374,128.28915
bored,1048.978236,-1144.011025,462.241119,-668.395799,1260.47997,-948.001294,-188.463199
angry,-947.501163,298.400096,-739.671808,733.05374,-948.001294,1237.81247,53.989056
disgust,-418.209135,301.993822,-8.123623,128.28915,-188.463199,53.989056,411.732054


In [22]:
corr_dfs['emovo']

Unnamed: 0,happy,fear,sad,surprised,disgust,neutral,angry
happy,5063.641937,-1046.393291,-8585.163721,2215.123592,-1394.340834,-1563.070839,6632.76373
fear,-1046.393291,321.056129,1782.254619,-441.975941,275.550964,271.674327,-1445.650186
sad,-8585.163721,1782.254619,14764.225273,-3817.63945,2354.164059,2682.922468,-11443.646894
surprised,2215.123592,-441.975941,-3817.63945,1071.260637,-624.946329,-710.90913,2889.094275
disgust,-1394.340834,275.550964,2354.164059,-624.946329,475.001415,433.262531,-1894.484502
neutral,-1563.070839,271.674327,2682.922468,-710.90913,433.262531,712.306094,-2228.260107
angry,6632.76373,-1445.650186,-11443.646894,2889.094275,-1894.484502,-2228.260107,9262.706452


In [23]:
corr_dfs['ravdess']

Unnamed: 0,angry,surprised,disgust,sad,happy,neutral,fear,calm
angry,31724.201232,2427.780734,-6353.87125,-13583.994073,9428.828267,-21919.59415,13622.474216,-26251.388207
surprised,2427.780734,322.243211,-564.830603,-1078.765452,798.533169,-1689.470877,1079.844706,-2132.262075
disgust,-6353.87125,-564.830603,1399.203254,2748.241017,-1969.428398,4328.336323,-2764.989068,5326.181754
sad,-13583.994073,-1078.765452,2748.241017,5878.348994,-4080.34091,9345.317684,-5848.890713,11267.590223
happy,9428.828267,798.533169,-1969.428398,-4080.34091,2920.886583,-6485.079913,4065.862929,-7902.103324
neutral,-21919.59415,-1689.470877,4328.336323,9345.317684,-6485.079913,15415.134696,-9495.289767,18176.334057
fear,13622.474216,1079.844706,-2764.989068,-5848.890713,4065.862929,-9495.289767,5966.988293,-11351.728316
calm,-26251.388207,-2132.262075,5326.181754,11267.590223,-7902.103324,18176.334057,-11351.728316,21908.656509


Remove diagonal elements since we are interested in correlations with other classes

In [24]:
for d in ds:
    np.fill_diagonal(corr_dfs[d].values, 0)

In [25]:
corr_dfs['emodb']

Unnamed: 0,sad,fear,neutral,happy,bored,angry,disgust
sad,0.0,-1431.216431,103.387519,-788.835933,1048.978236,-947.501163,-418.209135
fear,-1431.216431,0.0,230.783169,334.122339,-1144.011025,298.400096,301.993822
neutral,103.387519,230.783169,0.0,-438.95906,462.241119,-739.671808,-8.123623
happy,-788.835933,334.122339,-438.95906,0.0,-668.395799,733.05374,128.28915
bored,1048.978236,-1144.011025,462.241119,-668.395799,0.0,-948.001294,-188.463199
angry,-947.501163,298.400096,-739.671808,733.05374,-948.001294,0.0,53.989056
disgust,-418.209135,301.993822,-8.123623,128.28915,-188.463199,53.989056,0.0


In [26]:
corr_dfs['emovo']

Unnamed: 0,happy,fear,sad,surprised,disgust,neutral,angry
happy,0.0,-1046.393291,-8585.163721,2215.123592,-1394.340834,-1563.070839,6632.76373
fear,-1046.393291,0.0,1782.254619,-441.975941,275.550964,271.674327,-1445.650186
sad,-8585.163721,1782.254619,0.0,-3817.63945,2354.164059,2682.922468,-11443.646894
surprised,2215.123592,-441.975941,-3817.63945,0.0,-624.946329,-710.90913,2889.094275
disgust,-1394.340834,275.550964,2354.164059,-624.946329,0.0,433.262531,-1894.484502
neutral,-1563.070839,271.674327,2682.922468,-710.90913,433.262531,0.0,-2228.260107
angry,6632.76373,-1445.650186,-11443.646894,2889.094275,-1894.484502,-2228.260107,0.0


In [27]:
corr_dfs['ravdess']

Unnamed: 0,angry,surprised,disgust,sad,happy,neutral,fear,calm
angry,0.0,2427.780734,-6353.87125,-13583.994073,9428.828267,-21919.59415,13622.474216,-26251.388207
surprised,2427.780734,0.0,-564.830603,-1078.765452,798.533169,-1689.470877,1079.844706,-2132.262075
disgust,-6353.87125,-564.830603,0.0,2748.241017,-1969.428398,4328.336323,-2764.989068,5326.181754
sad,-13583.994073,-1078.765452,2748.241017,0.0,-4080.34091,9345.317684,-5848.890713,11267.590223
happy,9428.828267,798.533169,-1969.428398,-4080.34091,0.0,-6485.079913,4065.862929,-7902.103324
neutral,-21919.59415,-1689.470877,4328.336323,9345.317684,-6485.079913,0.0,-9495.289767,18176.334057
fear,13622.474216,1079.844706,-2764.989068,-5848.890713,4065.862929,-9495.289767,0.0,-11351.728316
calm,-26251.388207,-2132.262075,5326.181754,11267.590223,-7902.103324,18176.334057,-11351.728316,0.0


In [28]:
corr_dfs['emodb'].sum()

sad       -2433.396907
fear      -1409.928029
neutral    -390.342684
happy      -700.725562
bored     -1437.651961
angry     -1549.731372
disgust    -130.523929
dtype: float64

With absolute value in of correlations

In [29]:
mean_corr = {}
for d in ds:
    # divide len-1 since we set 0 in diagonal
    mean_corr[d] = corr_dfs[d].abs().sum().divide(other=len(corr_dfs[d])-1)

In [30]:
mean_corr['emodb'].sort_values()

disgust    183.177998
neutral    330.527716
happy      515.276004
angry      620.102860
fear       623.421147
bored      743.348445
sad        789.688069
dtype: float64

In [31]:
mean_corr['emovo'].sort_values()

fear          877.249888
disgust      1162.791537
neutral      1315.016567
surprised    1783.281453
happy        3572.809335
angry        4422.316616
sad          5110.965202
dtype: float64

In [32]:
mean_corr['ravdess'].sort_values()

surprised     1395.926802
disgust       3436.554059
happy         4961.453844
sad           6850.448582
fear          6889.868531
neutral      10205.631824
calm         11772.512565
angry        13369.704414
dtype: float64