In [1]:
# data manipulation
import numpy as np
import pandas as pd
# decomposition
from sklearn.decomposition import PCA
# encoding labels
from sklearn.preprocessing import LabelEncoder
# scaler
from sklearn.preprocessing import StandardScaler

# Load data

In [2]:
ds = ['emodb', 'emovo', 'ravdess']

In [3]:
md = {}
for d in ds:
    md[d] = pd.read_csv(f"/datasets/nicolas_facchinetti/processed_data/{d}/metadata_pre_aug.csv")
    
    # md[d] = pd.read_csv(f"D:\\Scuola\\TesiMag\\thesis_adversarial_ml_audio\\processed_data/{d}/metadata_pre_aug.csv")

In [4]:
md['emodb']

Unnamed: 0,path,filename,chunk,label,actor,gender
0,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,0,sad,13,f
1,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,1,sad,13,f
2,/datasets/nicolas_facchinetti/emodb/data/14b09...,14b09Ac.wav,0,fear,14,f
3,/datasets/nicolas_facchinetti/emodb/data/11b02...,11b02Na.wav,0,neutral,11,m
4,/datasets/nicolas_facchinetti/emodb/data/09b03...,09b03Fd.wav,0,happy,9,f
...,...,...,...,...,...,...
675,/datasets/nicolas_facchinetti/emodb/data/11b03...,11b03Td.wav,1,sad,11,m
676,/datasets/nicolas_facchinetti/emodb/data/12b02...,12b02Na.wav,0,neutral,12,m
677,/datasets/nicolas_facchinetti/emodb/data/14b01...,14b01Eb.wav,0,disgust,14,f
678,/datasets/nicolas_facchinetti/emodb/data/14b01...,14b01Eb.wav,1,disgust,14,f


In [5]:
sgrams = {}
for d in ds:
    sgrams[d] = np.load(f"/datasets/nicolas_facchinetti/processed_data/{d}/sgram_pre_aug.npy")
    # sgrams[d] = sgram_ravdess = np.load(f"D:\\Scuola\\TesiMag\\thesis_adversarial_ml_audio\\processed_data/{d}/sgram_pre_aug.npy")

np.shape(sgrams['emodb'])

(680, 128, 261)

Since the samples are in tabular form, flatten each samples' matrix to a row. Also standardize the data

In [6]:
for d in ds:
    sgrams[d] = [sample.flatten() for sample in sgrams[d]]
    scaler = StandardScaler()
    sgrams[d] = scaler.fit_transform(sgrams[d])

# Extract principal components

In [7]:
def get_components(logmelspectrograms):
    # apply PCA to data
    pca = PCA(0.9)
    principalComponents = pca.fit_transform(logmelspectrograms)
    print(f"Number of extracted components {pca.n_components_}")
    return principalComponents

In [8]:
pc = {}
for d in ds:
    print(f'For {d}')
    pc[d] = get_components(sgrams[d])

For emodb
Number of extracted components 256
For emovo
Number of extracted components 168
For ravdess
Number of extracted components 156


In [9]:
pd.DataFrame(pc['emodb'])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,246,247,248,249,250,251,252,253,254,255
0,20.388610,29.089204,-33.917645,-18.569226,6.067989,-8.882931,-0.688861,8.827057,8.848717,-14.225419,...,-0.100125,3.857443,-1.189150,-5.210749,-2.557228,8.787890,3.005838,0.038363,1.712366,-1.744635
1,69.059331,29.533312,-54.511986,-32.754742,-0.019496,-12.639450,7.753554,16.425557,9.483744,-14.275212,...,-0.771671,-6.589970,2.116073,-3.195187,-0.763356,1.509613,2.702545,0.592417,-4.084855,4.046255
2,-83.486472,-4.738516,6.170955,17.532081,6.809880,-3.472392,-5.603520,-6.854469,-3.514991,15.921548,...,-3.496690,-0.055174,6.359514,3.998968,-2.351081,-6.551988,0.131609,0.479675,3.674968,0.638430
3,80.825051,34.912518,32.316570,-14.833888,41.637621,4.024758,-38.799674,9.385691,-29.166832,-17.015674,...,-2.330586,-4.338018,0.991366,-5.451828,-3.425486,-4.906212,2.585623,0.949577,-3.401142,-4.707707
4,-111.807459,-25.951806,-1.333098,-10.225136,5.897344,-8.074732,-4.138928,5.778704,5.321723,6.479432,...,-0.341178,-1.631303,-9.266901,7.619178,1.998305,-4.741072,5.342593,-4.222276,6.803871,-3.800254
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
675,131.155368,12.696413,1.447879,-27.277145,-42.352139,-0.036546,19.635086,-2.170521,28.372700,-4.617190,...,-2.774763,1.602340,2.661717,2.556510,0.972311,-1.182181,2.508410,0.689024,0.504173,4.705253
676,39.655215,8.761926,42.967994,9.808666,34.281017,27.476781,-38.222569,23.414401,13.982044,8.633630,...,-1.242589,1.947879,-8.258554,-3.484053,0.341690,-3.750518,0.480906,-6.272596,-5.274757,4.411979
677,-13.902353,2.388572,0.731893,15.937000,-0.326478,1.920340,-12.609373,-9.248249,18.015236,-11.685856,...,2.061166,0.251076,0.993828,-0.248513,-8.392963,-2.390307,1.917414,-3.149088,4.157119,2.803557
678,619.125264,22.241687,3.818788,-8.224830,-6.785167,-5.986959,-13.270448,-8.089205,10.746739,9.656692,...,1.798618,1.001811,-1.267190,0.221679,3.361815,1.133309,2.061737,1.427608,-0.242549,-0.518544


In [10]:
pc['emodb'].shape

(680, 256)

# Example correlation analysis

In [11]:
md['emodb'].head(5)

Unnamed: 0,path,filename,chunk,label,actor,gender
0,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,0,sad,13,f
1,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,1,sad,13,f
2,/datasets/nicolas_facchinetti/emodb/data/14b09...,14b09Ac.wav,0,fear,14,f
3,/datasets/nicolas_facchinetti/emodb/data/11b02...,11b02Na.wav,0,neutral,11,m
4,/datasets/nicolas_facchinetti/emodb/data/09b03...,09b03Fd.wav,0,happy,9,f


In [12]:
a = md['emodb'][md['emodb']['label'] == 'sad']
a

Unnamed: 0,path,filename,chunk,label,actor,gender
0,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,0,sad,13,f
1,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,1,sad,13,f
10,/datasets/nicolas_facchinetti/emodb/data/03b02...,03b02Tb.wav,0,sad,3,m
11,/datasets/nicolas_facchinetti/emodb/data/03b02...,03b02Tb.wav,1,sad,3,m
28,/datasets/nicolas_facchinetti/emodb/data/14b02...,14b02Tc.wav,0,sad,14,f
...,...,...,...,...,...,...
652,/datasets/nicolas_facchinetti/emodb/data/16a04...,16a04Tc.wav,0,sad,16,f
662,/datasets/nicolas_facchinetti/emodb/data/16b01...,16b01Tb.wav,0,sad,16,f
663,/datasets/nicolas_facchinetti/emodb/data/16b01...,16b01Tb.wav,1,sad,16,f
674,/datasets/nicolas_facchinetti/emodb/data/11b03...,11b03Td.wav,0,sad,11,m


In [13]:
b = md['emodb'][md['emodb']['label'] == 'fear']
b

Unnamed: 0,path,filename,chunk,label,actor,gender
2,/datasets/nicolas_facchinetti/emodb/data/14b09...,14b09Ac.wav,0,fear,14,f
43,/datasets/nicolas_facchinetti/emodb/data/03b10...,03b10Ab.wav,0,fear,3,m
46,/datasets/nicolas_facchinetti/emodb/data/09b10...,09b10Aa.wav,0,fear,9,f
47,/datasets/nicolas_facchinetti/emodb/data/08a01...,08a01Ab.wav,0,fear,8,f
57,/datasets/nicolas_facchinetti/emodb/data/11a04...,11a04Ac.wav,0,fear,11,m
...,...,...,...,...,...,...
602,/datasets/nicolas_facchinetti/emodb/data/10b09...,10b09Ad.wav,0,fear,10,m
611,/datasets/nicolas_facchinetti/emodb/data/13a02...,13a02Ad.wav,0,fear,13,f
616,/datasets/nicolas_facchinetti/emodb/data/14a01...,14a01Aa.wav,0,fear,14,f
648,/datasets/nicolas_facchinetti/emodb/data/14b10...,14b10Ad.wav,0,fear,14,f


In [14]:
a_data = pc['emodb'][a.index]
b_data = pc['emodb'][b.index]
a_data.shape, b_data.shape

((113, 256), (72, 256))

Compute Pearson coefficient between two samples

Same sample

In [15]:
np.corrcoef(a_data[0], a_data[0])

array([[1., 1.],
       [1., 1.]])

Same class, different samples

In [16]:
np.corrcoef(a_data[0], a_data[8])

array([[1.        , 0.11130695],
       [0.11130695, 1.        ]])

Different class

In [17]:
np.corrcoef(a_data[0], b_data[0])

array([[ 1.        , -0.17666328],
       [-0.17666328,  1.        ]])

In [18]:
np.corrcoef(a_data[0], b_data[0])[0][1]

-0.17666327604987936

Compute pairwise Pearson coefficient between samples of two classes

In [19]:
np.array([np.corrcoef(s1, s2)[0][1] for s1 in a_data for s2 in b_data])

array([-0.17666328, -0.05698032, -0.21321667, ..., -0.57928995,
       -0.58957727,  0.11565674])

In [20]:
np.array([np.corrcoef(s1, s2)[0][1] for s1 in a_data for s2 in b_data]).shape

(8136,)

In [21]:
def compute_corr(md, data):
    corr_p = []
    labels = md.label.unique()

    for l1 in labels:
        dl1 = data[md[md['label'] == l1].index]
        l_corr = []
        for l2 in labels:
            dl2 = data[md[md['label'] == l2].index]
            
            c = [np.corrcoef(s1, s2)[0][1] for s1 in dl1 for s2 in dl2]
            
            l_corr.append(np.mean(c))
        corr_p.append(l_corr)
    # create DataFrame with label names
    corr_p = pd.DataFrame(corr_p, columns=labels, index=labels)
    return corr_p

In [22]:
def compute_corr2(md, data):
    corr_p = []
    labels = md.label.unique()

    for l1 in labels:
        dl1 = data[md[md['label'] == l1].index]
        l_corr = []
        for l2 in labels:
            dl2 = data[md[md['label'] == l2].index]
            
            c = [np.corrcoef(s1, s2)[0][1] for s1 in dl1 for s2 in dl2]
            
            l_corr.append(np.mean(np.abs(c)))
        corr_p.append(l_corr)
    # create DataFrame with label names
    corr_p = pd.DataFrame(corr_p, columns=labels, index=labels)
    return corr_p

Without abs

In [23]:
compute_corr(md['emodb'], pc['emodb'])

Unnamed: 0,sad,fear,neutral,happy,bored,angry,disgust
sad,0.077511,-0.034478,0.00449,-0.049251,0.018384,-0.053962,-0.02763
fear,-0.034478,0.189647,0.079442,0.095729,0.043805,0.074339,0.097097
neutral,0.00449,0.079442,0.086877,0.017955,0.062002,-0.007633,0.04256
happy,-0.049251,0.095729,0.017955,0.083151,-0.001946,0.072037,0.055213
bored,0.018384,0.043805,0.062002,-0.001946,0.06135,-0.02377,0.024621
angry,-0.053962,0.074339,-0.007633,0.072037,-0.02377,0.083122,0.039777
disgust,-0.02763,0.097097,0.04256,0.055213,0.024621,0.039777,0.073368


With abs

In [24]:
compute_corr2(md['emodb'], pc['emodb'])

Unnamed: 0,sad,fear,neutral,happy,bored,angry,disgust
sad,0.304232,0.276351,0.286979,0.290847,0.312401,0.257056,0.280908
fear,0.276351,0.280585,0.276571,0.280633,0.294792,0.245926,0.271617
neutral,0.286979,0.276571,0.30748,0.287051,0.321602,0.25093,0.280919
happy,0.290847,0.280633,0.287051,0.307327,0.309254,0.266525,0.283117
bored,0.312401,0.294792,0.321602,0.309254,0.355578,0.272095,0.301589
angry,0.257056,0.245926,0.25093,0.266525,0.272095,0.242817,0.248091
disgust,0.280908,0.271617,0.280919,0.283117,0.301589,0.248091,0.287935


# Compute Pearson correlation between each pair of samples for each label

In [25]:
corr1 = {}
for d in ds:
    corr1[d] = compute_corr(md[d], pc[d])
    print(f'Done {d}')

Done emodb
Done emovo
Done ravdess


In [26]:
corr1['emodb']

Unnamed: 0,sad,fear,neutral,happy,bored,angry,disgust
sad,0.077511,-0.034478,0.00449,-0.049251,0.018384,-0.053962,-0.02763
fear,-0.034478,0.189647,0.079442,0.095729,0.043805,0.074339,0.097097
neutral,0.00449,0.079442,0.086877,0.017955,0.062002,-0.007633,0.04256
happy,-0.049251,0.095729,0.017955,0.083151,-0.001946,0.072037,0.055213
bored,0.018384,0.043805,0.062002,-0.001946,0.06135,-0.02377,0.024621
angry,-0.053962,0.074339,-0.007633,0.072037,-0.02377,0.083122,0.039777
disgust,-0.02763,0.097097,0.04256,0.055213,0.024621,0.039777,0.073368


In [27]:
corr1['emovo']

Unnamed: 0,happy,fear,sad,surprised,disgust,neutral,angry
happy,0.130296,-0.031298,-0.187337,0.025507,-0.039277,-0.013427,0.161991
fear,-0.031298,0.013431,0.046718,-0.004932,0.009466,0.002195,-0.044654
sad,-0.187337,0.046718,0.28556,-0.036735,0.056517,0.025307,-0.250438
surprised,0.025507,-0.004932,-0.036735,0.010721,-0.008186,-0.00344,0.029611
disgust,-0.039277,0.009466,0.056517,-0.008186,0.017389,0.00359,-0.05361
neutral,-0.013427,0.002195,0.025307,-0.00344,0.00359,0.013051,-0.02623
angry,0.161991,-0.044654,-0.250438,0.029611,-0.05361,-0.02623,0.233166


In [28]:
corr1['ravdess']

Unnamed: 0,angry,surprised,disgust,sad,happy,neutral,fear,calm
angry,0.42707,0.054681,-0.142264,-0.299695,0.142771,-0.488983,0.175742,-0.548379
surprised,0.054681,0.019099,-0.024168,-0.039967,0.023697,-0.064334,0.026185,-0.076952
disgust,-0.142264,-0.024168,0.058756,0.101692,-0.053123,0.159442,-0.062729,0.186848
sad,-0.299695,-0.039967,0.101692,0.214016,-0.102245,0.342471,-0.125532,0.3866
happy,0.142771,0.023697,-0.053123,-0.102245,0.055722,-0.160648,0.060592,-0.187376
neutral,-0.488983,-0.064334,0.159442,0.342471,-0.160648,0.579079,-0.20575,0.634399
fear,0.175742,0.026185,-0.062729,-0.125532,0.060592,-0.20575,0.078595,-0.231367
calm,-0.548379,-0.076952,0.186848,0.3866,-0.187376,0.634399,-0.231367,0.715301


In [29]:
def mean_corr_classes(corr_df):
    copy = corr_df.copy(deep=True)
    # remove diagonal element
    np.fill_diagonal(copy.values, 0)
    # divide len-1 since we set 0 in diagonal
    mean_corr = copy.abs().sum().divide(other=len(copy)-1).sort_values()
    return mean_corr

In [30]:
mean_corr_classes(corr1['emodb'])

bored      0.029088
sad        0.031366
neutral    0.035680
angry      0.045253
disgust    0.047816
happy      0.048689
fear       0.070815
dtype: float64

In [31]:
mean_corr_classes(corr1['emovo'])

neutral      0.012365
surprised    0.018068
fear         0.023211
disgust      0.028441
happy        0.076473
angry        0.094422
sad          0.100509
dtype: float64

In [32]:
mean_corr_classes(corr1['ravdess'])

surprised    0.044283
disgust      0.104324
happy        0.104350
fear         0.126842
sad          0.199743
angry        0.264645
neutral      0.293718
calm         0.321703
dtype: float64

Want to have 5 label for each class so remove the most correlated label and recompute the values

In [33]:
def remove_labels(md, label):
    return md.drop(md[md.label == label].index)

In [34]:
rm1emodb = remove_labels(md['emodb'], 'fear')
rm1emovo = remove_labels(md['emovo'], 'sad')
rm1ravdess = remove_labels(md['ravdess'], 'calm')
md2 = {'emodb': rm1emodb,'emovo': rm1emovo,'ravdess': rm1ravdess}

In [35]:
rm1emovo

Unnamed: 0,path,filename,chunk,label,actor,gender
0,/datasets/nicolas_facchinetti/emovo/data/m3/gi...,gio-m3-l1.wav,0,happy,m3,m
1,/datasets/nicolas_facchinetti/emovo/data/m3/gi...,gio-m3-l1.wav,1,happy,m3,m
2,/datasets/nicolas_facchinetti/emovo/data/m3/gi...,gio-m3-b3.wav,0,happy,m3,m
3,/datasets/nicolas_facchinetti/emovo/data/m3/pa...,pau-m3-n5.wav,0,fear,m3,m
4,/datasets/nicolas_facchinetti/emovo/data/m3/pa...,pau-m3-n5.wav,1,fear,m3,m
...,...,...,...,...,...,...
757,/datasets/nicolas_facchinetti/emovo/data/m2/di...,dis-m2-b3.wav,0,disgust,m2,m
758,/datasets/nicolas_facchinetti/emovo/data/m2/so...,sor-m2-d1.wav,0,surprised,m2,m
759,/datasets/nicolas_facchinetti/emovo/data/m2/pa...,pau-m2-b3.wav,0,fear,m2,m
761,/datasets/nicolas_facchinetti/emovo/data/m2/ra...,rab-m2-l1.wav,0,angry,m2,m


In [36]:
rm1emovo

Unnamed: 0,path,filename,chunk,label,actor,gender
0,/datasets/nicolas_facchinetti/emovo/data/m3/gi...,gio-m3-l1.wav,0,happy,m3,m
1,/datasets/nicolas_facchinetti/emovo/data/m3/gi...,gio-m3-l1.wav,1,happy,m3,m
2,/datasets/nicolas_facchinetti/emovo/data/m3/gi...,gio-m3-b3.wav,0,happy,m3,m
3,/datasets/nicolas_facchinetti/emovo/data/m3/pa...,pau-m3-n5.wav,0,fear,m3,m
4,/datasets/nicolas_facchinetti/emovo/data/m3/pa...,pau-m3-n5.wav,1,fear,m3,m
...,...,...,...,...,...,...
757,/datasets/nicolas_facchinetti/emovo/data/m2/di...,dis-m2-b3.wav,0,disgust,m2,m
758,/datasets/nicolas_facchinetti/emovo/data/m2/so...,sor-m2-d1.wav,0,surprised,m2,m
759,/datasets/nicolas_facchinetti/emovo/data/m2/pa...,pau-m2-b3.wav,0,fear,m2,m
761,/datasets/nicolas_facchinetti/emovo/data/m2/ra...,rab-m2-l1.wav,0,angry,m2,m


Second pass on all three dataset

In [37]:
corr2 = {}
for d in ds:
    corr2[d] = compute_corr(md2[d], pc[d])
    print(f'Done {d}')

Done emodb
Done emovo
Done ravdess


In [38]:
corr2['emodb']

Unnamed: 0,sad,neutral,happy,bored,angry,disgust
sad,0.077511,0.00449,-0.049251,0.018384,-0.053962,-0.02763
neutral,0.00449,0.086877,0.017955,0.062002,-0.007633,0.04256
happy,-0.049251,0.017955,0.083151,-0.001946,0.072037,0.055213
bored,0.018384,0.062002,-0.001946,0.06135,-0.02377,0.024621
angry,-0.053962,-0.007633,0.072037,-0.02377,0.083122,0.039777
disgust,-0.02763,0.04256,0.055213,0.024621,0.039777,0.073368


In [39]:
corr2['emovo']

Unnamed: 0,happy,fear,surprised,disgust,neutral,angry
happy,0.130296,-0.031298,0.025507,-0.039277,-0.013427,0.161991
fear,-0.031298,0.013431,-0.004932,0.009466,0.002195,-0.044654
surprised,0.025507,-0.004932,0.010721,-0.008186,-0.00344,0.029611
disgust,-0.039277,0.009466,-0.008186,0.017389,0.00359,-0.05361
neutral,-0.013427,0.002195,-0.00344,0.00359,0.013051,-0.02623
angry,0.161991,-0.044654,0.029611,-0.05361,-0.02623,0.233166


In [40]:
corr2['ravdess']

Unnamed: 0,angry,surprised,disgust,sad,happy,neutral,fear
angry,0.42707,0.054681,-0.142264,-0.299695,0.142771,-0.488983,0.175742
surprised,0.054681,0.019099,-0.024168,-0.039967,0.023697,-0.064334,0.026185
disgust,-0.142264,-0.024168,0.058756,0.101692,-0.053123,0.159442,-0.062729
sad,-0.299695,-0.039967,0.101692,0.214016,-0.102245,0.342471,-0.125532
happy,0.142771,0.023697,-0.053123,-0.102245,0.055722,-0.160648,0.060592
neutral,-0.488983,-0.064334,0.159442,0.342471,-0.160648,0.579079,-0.20575
fear,0.175742,0.026185,-0.062729,-0.125532,0.060592,-0.20575,0.078595


In [41]:
mean_corr_classes(corr2['emodb'])

bored      0.026145
neutral    0.026928
sad        0.030744
disgust    0.037960
happy      0.039280
angry      0.039436
dtype: float64

In [42]:
mean_corr_classes(corr2['emovo'])

neutral      0.009776
surprised    0.014335
fear         0.018509
disgust      0.022826
happy        0.054300
angry        0.063219
dtype: float64

In [43]:
mean_corr_classes(corr2['ravdess'])

surprised    0.038839
happy        0.090513
disgust      0.090570
fear         0.109422
sad          0.168600
angry        0.217356
neutral      0.236938
dtype: float64

Second delete and last pass only on ravdess

In [45]:
rm2ravdess = remove_labels(md2['ravdess'], 'neutral')
corr3 = compute_corr(rm2ravdess, pc['ravdess'])

In [46]:
corr3

Unnamed: 0,angry,surprised,disgust,sad,happy,fear
angry,0.42707,0.054681,-0.142264,-0.299695,0.142771,0.175742
surprised,0.054681,0.019099,-0.024168,-0.039967,0.023697,0.026185
disgust,-0.142264,-0.024168,0.058756,0.101692,-0.053123,-0.062729
sad,-0.299695,-0.039967,0.101692,0.214016,-0.102245,-0.125532
happy,0.142771,0.023697,-0.053123,-0.102245,0.055722,0.060592
fear,0.175742,0.026185,-0.062729,-0.125532,0.060592,0.078595


In [47]:
mean_corr_classes(corr3)

surprised    0.033739
happy        0.076486
disgust      0.076795
fear         0.090156
sad          0.133826
angry        0.163030
dtype: float64