In [7]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from random import seed, shuffle
import os

import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
sns.set_theme(style="white")

from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from sklearn.cluster import KMeans, SpectralClustering

from scipy import stats
from scipy.spatial.distance import directed_hausdorff, euclidean, cosine, pdist

from src.downsample import downsamp_audio
import src.dimension_reducer as dr
import src.distance_metrics as dm
import src.emb_manipulator as em

from IPython.display import clear_output

import warnings
warnings.filterwarnings('ignore')

#Directory where already-generated embeddings stored
embedding_dir = '/Users/rahulbrito/Documents/projects/infantvoice/data/embeddings'
emb_models = ['emb_ami', 'emb','emb_voxceleb'] #names of pretrained embedding extractor models. in any "all models" script we evaluate which one(s) might be best

##files starting with 020422 are from the recent recordings of actual moms sohye sent
## files starting with 020322 were all the test recordings from last year

#the directory has other csvs and i only want today's
#file = [file for file in os.listdir(embedding_dir) if file.startswith("020422")] 
#file = '022822_postpartum_moms_20part_emb_ami.csv'
file = '03016_diarized_pyv2.csv'

#create a dictionary where each item is the pd df of the embeddings extracted from that model
#all_embs = {emb_models[index]: ee.load_embs(os.path.join(embedding_dir,file[index])) for index in np.arange(len(file))}
emb = pd.read_csv(os.path.join(embedding_dir,file), index_col=0)

#choose 'emb_ami' as the embedding model to use because it seemed to result in best preserved high-dimension structure when dimension reduced
#emb = all_embs[emb_models[0]]
emb_down = em.resample_data(emb,1)

#average emebdding for each participant
emb_a = em.embedding_averager(emb_down)

#egemap (88 features) for each participant

gemap_file = '020422_postpartum_moms_gemaps'
gemap = pd.read_csv(os.path.join(embedding_dir,gemap_file), index_col=0)
gemap = gemap.drop(columns=['start','end'])

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
avg_emb = em.embedding_averager(emb_down)
cos_dist_high_dim = dm.cos_distance(avg_emb)

Processing row 19, col 19


In [8]:
gemap

Unnamed: 0_level_0,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
/Users/rahulbrito/Documents/projects/infantvoice/data/Full_Readings/16khz_020422_postpartum_moms/014.wav,34.656258,0.226665,29.325533,32.33963,37.971905,8.646372,417.31317,756.0397,176.3497,421.68176,...,0.034896,0.025749,0.027282,1.884727,1.676721,0.197657,0.175015,0.396689,1.097285,-29.941727
/Users/rahulbrito/Documents/projects/infantvoice/data/Full_Readings/16khz_020422_postpartum_moms/001.wav,40.614002,0.19399,33.898705,38.412937,48.591347,14.692642,259.02628,344.9783,189.37656,349.84848,...,-0.009542,0.015704,0.04161,2.023577,2.042703,0.268282,0.310515,0.240248,0.514109,-27.318361
/Users/rahulbrito/Documents/projects/infantvoice/data/Full_Readings/16khz_020422_postpartum_moms/015.wav,40.012825,0.161124,34.83711,39.58894,45.550262,10.713154,231.9797,359.49014,140.45279,287.63593,...,0.061847,0.007072,0.080939,2.109048,1.968953,0.266091,0.315487,0.251745,1.239365,-26.354046
/Users/rahulbrito/Documents/projects/infantvoice/data/Full_Readings/16khz_020422_postpartum_moms/003.wav,38.13042,0.165808,32.843327,36.665283,42.95469,10.111362,212.71884,369.81903,187.45552,309.13373,...,-0.012003,0.012537,0.087544,1.871052,2.005616,0.20984,0.224457,0.282699,0.540035,-21.986076
/Users/rahulbrito/Documents/projects/infantvoice/data/Full_Readings/16khz_020422_postpartum_moms/017.wav,34.798317,0.137045,32.089806,34.590267,37.231243,5.141438,156.26746,403.00116,100.74687,201.76405,...,0.049451,0.016761,0.035364,1.985528,1.621728,0.284073,0.266139,0.3306,0.742869,-26.701868
/Users/rahulbrito/Documents/projects/infantvoice/data/Full_Readings/16khz_020422_postpartum_moms/016.wav,36.757298,0.20771,30.19368,35.667973,42.06424,11.87056,241.57968,521.6707,161.91745,300.65695,...,0.043272,0.018943,0.033887,2.033679,1.692516,0.237146,0.236825,0.363136,1.277003,-25.765015
/Users/rahulbrito/Documents/projects/infantvoice/data/Full_Readings/16khz_020422_postpartum_moms/002.wav,41.346184,0.193928,33.896748,40.29146,48.58111,14.684361,242.58864,363.40833,193.45422,334.92056,...,-0.007387,0.010263,0.047721,1.986158,1.840942,0.17713,0.202996,0.30797,0.498998,-29.922302
/Users/rahulbrito/Documents/projects/infantvoice/data/Full_Readings/16khz_020422_postpartum_moms/006.wav,39.151253,0.200883,33.308697,36.586136,45.129265,11.820568,292.00543,477.85782,190.17604,320.52896,...,-0.012655,0.019526,0.041911,2.212083,2.067429,0.194195,0.175429,0.291202,0.530252,-29.348478
/Users/rahulbrito/Documents/projects/infantvoice/data/Full_Readings/16khz_020422_postpartum_moms/012.wav,37.890427,0.199913,32.722797,36.46066,42.930946,10.208149,339.9228,572.84937,182.87581,330.6992,...,0.048792,0.020148,0.04941,2.06064,1.660605,0.18336,0.204085,0.37794,1.063458,-25.793747
/Users/rahulbrito/Documents/projects/infantvoice/data/Full_Readings/16khz_020422_postpartum_moms/013.wav,37.759846,0.185269,32.59181,36.301266,42.268417,9.676609,255.28006,503.61493,163.4135,325.6412,...,0.039742,0.023627,0.053206,2.301372,1.791822,0.221621,0.218884,0.340325,1.028085,-26.40707
