In [2]:
import sys

sys.path.append("..")

from pathlib import Path
import pandas as pd

from ssl_agent.ssl_agent import SSLAgent
from lib.dataset_wrapper import Dataset
from lib import abx_utils
import torch
import numpy as np

torch.cuda.empty_cache()

ABX_NB_SAMPLES = 500
AGENT_ABX_DISTANCES = [
    {
        "art_estimated": {
            "metric": "cosine",
            "weight": 1,
        }
    },
    {
        "feat_seq": {
            "metric": "cosine",
            "weight": 1,
        }
    }
]


def compute_custom_ABX(agent_path, kind):
    assert kind in ['heldout', 'test']
    save_path = agent_path / 'results' / f'test_ABX_{kind}.csv'
    save_path.parent.mkdir(exist_ok=True)

    if not save_path.is_file():
        agent = SSLAgent.reload(str(agent_path))

        main_dataset = Dataset(kind)
        agent.config['dataset']['name'] = kind
        agent_lab = agent.get_datasplit_lab()
        speakers = np.unique([e.split('_')[0] for e in agent_lab[kind].keys()])
        consonants = main_dataset.phones_infos["consonants"]
        vowels = main_dataset.phones_infos["vowels"]
        consonants_indexes = abx_utils.get_datasets_phones_indexes(agent_lab, consonants, vowels)
        agent_features = agent.repeat_datasplit()
        out = {}
        for distance in AGENT_ABX_DISTANCES:
            for speaker in speakers:
                speaker_consonants_indexes = {
                    phone: [tup for tup in consonants_indexes[phone] if tup[1].split('_')[0] == speaker]
                    for phone in consonants_indexes.keys()
                }
                speaker_agent_features = {kind: {}}
                for feat_type, items_dict in agent_features[kind].items():
                    speaker_agent_features[kind][feat_type] = {k: v for k, v in items_dict.items() if k.startswith(speaker)}
                abx_matrix = abx_utils.get_abx_matrix(consonants, speaker_consonants_indexes, speaker_agent_features,
                                                      distance, ABX_NB_SAMPLES, seed=0)
                groups_score = abx_utils.get_groups_score(consonants, abx_matrix,
                                                          main_dataset.phones_infos["consonant_groups"])
                global_score = abx_utils.get_global_score(abx_matrix)
                distance_name = list(distance.keys())[0]
                scores = {f'{speaker}_place_{distance_name}': groups_score['place'],
                          f'{speaker}_manner_{distance_name}': groups_score['manner'],
                          f'{speaker}_global_{distance_name}': global_score}
                out = {**out, **scores}
        out = pd.DataFrame.from_dict([out])
        out.to_csv(save_path, index=False)
    else:
        print("Already done")

In [None]:
agent_folder = Path('../out/ssl_agent/cosine/8_speakers_6000_mn')

# Compute cosine agents (MFCC and wav2vec 2.0)
for seed in range(0, 5):
    compute_custom_ABX(agent_folder / f'8_speakers_6000_mn_mfcc_delta_delta2_cosine_seed_{seed}', kind='heldout')
    compute_custom_ABX(agent_folder / f'8_speakers_6000_mn_mfcc_delta_delta2_cosine_seed_{seed}', kind='test')
    for i in range(0, 13):
        agent_name = f'8_speakers_6000_mn_layer_{i}_cosine_seed_{seed}'
        agent_path = agent_folder / agent_name
        compute_custom_ABX(agent_path, kind='heldout')
        compute_custom_ABX(agent_path, kind='test')
 

Removing weight norm...
Transposing features to get [T,N] vectors.


  return _VF.stft(input, n_fft, hop_length, win_length, window,  # type: ignore
  return _VF.stft(input, n_fft, hop_length, win_length, window,  # type: ignore


In [None]:

# Load MFCC agent
baseline_cosine = {
    'mfcc_agent': { 
        'heldout': {},
        'test': {}
    }
}
    
for seed in range(0, 5):
    agent_name = f'8_speakers_6000_mn_mfcc_delta_delta2_cosine_seed_{seed}'
    for kind in ['heldout', 'test']:
        agent_path = agent_folder / agent_name / 'results' / 'test_ABX_heldout.csv'
        res = pd.read_csv(agent_path).to_dict('list')
        res = {k:v[0] for k, v in res.items()}
        for k, v in res.items():
            if k in baseline_cosine['mfcc_agent']:
                baseline_cosine['mfcc_agent'][kind][k].append(v)
            else:
                baseline_cosine['mfcc_agent'][kind][k] = [v]
print(baseline_cosine)

# Load wav2vec 2.0 agent
w2v_cosine = {
    f'ssl_agent_layer_{i}': {
        'heldout': {},
        'test': {}
    }
    for i in range(0,13)
}

for seed in range(0, 0):
    for i in range(0, 13):
        for kind in ['heldout', 'test]:
            agent_name = f'8_speakers_6000_mn_layer_{i}_cosine_seed_{seed}'
            agent_path = agent_folder / agent_name / 'results' / f'test_ABX_{kind}.csv'
            res = pd.read_csv(agent_path).to_dict('list')
            res = {k:v[0] for k, v in res.items()}
            for k, v in res.items():
                if k in w2v_cosine[f'ssl_agent_layer_{i}']:
                    w2v_cosine[f'ssl_agent_layer_{i}'][kind][k].append(v)
                else:
                    w2v_cosine[f'ssl_agent_layer_{i}'][kind][k] = [v]    