# I Vector

In [1]:
import os
import numpy as np
from glob import glob
from multiprocessing import cpu_count
from glob import glob
import warnings
warnings.filterwarnings("ignore")
import logging
logging.basicConfig(level=logging.INFO)

import sidekit
from model_interface import SidekitModel
from ubm import UBM

from utils import parse_yaml



In [2]:
conf_path = "conf.yaml"

conf = parse_yaml(conf_path)
NUM_GAUSSIANS = conf['num_gaussians']
BATCH_SIZE = conf['batch_size']
TV_RANK = conf['tv_rank']
TV_ITERATIONS = conf['tv_iterations']
ENABLE_PLDA = conf['enable_plda']
BASE_DIR = conf['outpath']

print("conf_path: ", conf_path)
print("NUM_GAUSSIANS: ", NUM_GAUSSIANS)
print("BATCH_SIZE: ", BATCH_SIZE)
print("TV_RANK: ", TV_RANK)
print("TV_ITERATIONS: ", TV_ITERATIONS)
print("ENABLE_PLDA: ", ENABLE_PLDA)
print("BASE_DIR: ", BASE_DIR)

conf_path:  conf.yaml
NUM_GAUSSIANS:  64
BATCH_SIZE:  30
TV_RANK:  25
TV_ITERATIONS:  50
ENABLE_PLDA:  none
BASE_DIR:  C:/Users/sean/han_ma_eum/Speaker-Recognition/outputs


In [3]:
# sidekitmodel = SidekitModel(conf_path)


"""This private method is used to create Statistic Servers.TODO: post some more info"""

# Read tv_idmap
tv_idmap = sidekit.IdMap.read(os.path.join(BASE_DIR, "task", "tv_idmap.h5"))
back_idmap = tv_idmap
# If PLDA is enabled
if ENABLE_PLDA:
    # Read plda_idmap
    plda_idmap = sidekit.IdMap.read(os.path.join(BASE_DIR, "task", "plda_idmap.h5"))
    # Create a joint StatServer for TV and PLDA training data
    back_idmap = plda_idmap.merge(tv_idmap)
    if not back_idmap.validate():
        raise RuntimeError("Error merging tv_idmap & plda_idmap")

# Check UBM model
ubm_name = "ubm_{}.h5".format(NUM_GAUSSIANS)
ubm_path = os.path.join(BASE_DIR, "ubm", ubm_name)
print("ubm_name: ", ubm_name)
print("ubm_path: ", ubm_path)
if not os.path.exists(ubm_path):
    print("train UBM...")
    #if UBM model does not exist, train one
    logging.info("Training UBM-{} model".format(NUM_GAUSSIANS))
    ubm = UBM(conf_path)
    ubm.train()
#load trained UBM model
logging.info("Loading trained UBM-{} model".format(NUM_GAUSSIANS))
ubm = sidekit.Mixture()
ubm.read(ubm_path)
back_stat = sidekit.StatServer( statserver_file_name=back_idmap, 
                                ubm=ubm
                                )
# Create Feature Server
fs = SidekitModel(conf_path).createFeatureServer()

# Jointly compute the sufficient statistics of TV and (if enabled) PLDA data
back_filename = 'back_stat_{}.h5'.format(NUM_GAUSSIANS)
if not os.path.isfile(os.path.join(BASE_DIR, "stat", back_filename)):
    #BUG: don't use NUM_THREADS when assgining num_thread
    # as it's prune to race-conditioning
    back_stat.accumulate_stat(
        ubm=ubm,
        feature_server=fs,
        seg_indices=range(back_stat.segset.shape[0])
        )
    back_stat.write(os.path.join(BASE_DIR, "stat", back_filename))

# Load the sufficient statistics from TV training data
tv_filename = 'tv_stat_{}.h5'.format(NUM_GAUSSIANS)
if not os.path.isfile(os.path.join(BASE_DIR, "stat", tv_filename)):
    tv_stat = sidekit.StatServer.read_subset(
        os.path.join(BASE_DIR, "stat", back_filename),
        tv_idmap
        )
    tv_stat.write(os.path.join(BASE_DIR, "stat", tv_filename))

# Load sufficient statistics and extract i-vectors from PLDA training data
if ENABLE_PLDA:
    plda_filename = 'plda_stat_{}.h5'.format(NUM_GAUSSIANS)
    if not os.path.isfile(os.path.join(BASE_DIR, "stat", plda_filename)):
        plda_stat = sidekit.StatServer.read_subset(
            os.path.join(BASE_DIR, "stat", back_filename),
            plda_idmap
            )
        plda_stat.write(os.path.join(BASE_DIR, "stat", plda_filename))

# Load sufficient statistics from test data
filename = 'test_stat_{}.h5'.format(NUM_GAUSSIANS)
if not os.path.isfile(os.path.join(BASE_DIR, "stat", filename)):
    test_idmap = sidekit.IdMap.read(os.path.join(BASE_DIR, "task", "test_idmap.h5"))
    test_stat = sidekit.StatServer( statserver_file_name=test_idmap, 
                                    ubm=ubm
                                    )
    # Create Feature Server
    fs = SidekitModel(conf_path).createFeatureServer()
    # Jointly compute the sufficient statistics of TV and PLDA data
    #BUG: don't use NUM_THREADS when assgining num_thread as it's prune to race-conditioning
    test_stat.accumulate_stat(ubm=ubm,
                            feature_server=fs,
                            seg_indices=range(test_stat.segset.shape[0])
                            )
    test_stat.write(os.path.join(BASE_DIR, "stat", filename))

INFO:root:Loading trained UBM-64 model
INFO:root:Feature-Server is created


ubm_name:  ubm_64.h5
ubm_path:  C:/Users/sean/han_ma_eum/Speaker-Recognition/outputs\ubm\ubm_64.h5


In [4]:
# Load UBM model
model_name = "ubm_{}.h5".format(NUM_GAUSSIANS)
ubm = sidekit.Mixture()
ubm.read(os.path.join(BASE_DIR, "ubm", model_name))

# Load TV matrix
filename = "tv_matrix_{}".format(NUM_GAUSSIANS)
outputPath = os.path.join(BASE_DIR, "ivector", filename)
fa = sidekit.FactorAnalyser(outputPath+".h5")

In [5]:
# Extract i-vectors from enrollment data
logging.info("Extracting i-vectors from enrollment data")
filename = 'enroll_stat_{}.h5'.format(NUM_GAUSSIANS)
enroll_stat = sidekit.StatServer.read(os.path.join(BASE_DIR, 'stat', filename))
enroll_iv = fa.extract_ivectors_single( ubm=ubm,
                                        stat_server=enroll_stat,
                                        uncertainty=False
                                        )

INFO:root:Extracting i-vectors from enrollment data
Processing: 100%|██████████| 300/300 [00:00<00:00, 913.52it/s]


In [6]:
enroll_iv.modelset

array(['S01', 'S01', 'S01', 'S01', 'S01', 'S01', 'S02', 'S02', 'S02',
       'S02', 'S02', 'S02', 'S03', 'S03', 'S03', 'S03', 'S03', 'S03',
       'S04', 'S04', 'S04', 'S04', 'S04', 'S04', 'S05', 'S05', 'S05',
       'S05', 'S05', 'S05', 'S06', 'S06', 'S06', 'S06', 'S06', 'S06',
       'S07', 'S07', 'S07', 'S07', 'S07', 'S07', 'S08', 'S08', 'S08',
       'S08', 'S08', 'S08', 'S09', 'S09', 'S09', 'S09', 'S09', 'S09',
       'S10', 'S10', 'S10', 'S10', 'S10', 'S10', 'S11', 'S11', 'S11',
       'S11', 'S11', 'S11', 'S12', 'S12', 'S12', 'S12', 'S12', 'S12',
       'S13', 'S13', 'S13', 'S13', 'S13', 'S13', 'S14', 'S14', 'S14',
       'S14', 'S14', 'S14', 'S15', 'S15', 'S15', 'S15', 'S15', 'S15',
       'S16', 'S16', 'S16', 'S16', 'S16', 'S16', 'S17', 'S17', 'S17',
       'S17', 'S17', 'S17', 'S18', 'S18', 'S18', 'S18', 'S18', 'S18',
       'S19', 'S19', 'S19', 'S19', 'S19', 'S19', 'S20', 'S20', 'S20',
       'S20', 'S20', 'S20', 'S21', 'S21', 'S21', 'S21', 'S21', 'S21',
       'S22', 'S22',

In [7]:
enroll_iv.stat1

array([[-1.5311496 , -1.28135981,  1.32816947, ..., -0.64532526,
        -0.79574252,  1.09020777],
       [-1.05740162,  0.03279953, -0.32894721, ...,  0.08196629,
        -0.12916289,  0.34466696],
       [-0.75960468, -0.4688972 ,  0.80197035, ..., -0.49879355,
        -0.42238866,  0.88783695],
       ...,
       [ 0.68805278, -0.47003264, -0.76090148, ..., -0.44404721,
         0.67389389, -0.05768816],
       [-0.62228028,  0.57637023, -0.75697214, ..., -0.60348531,
         1.18879769,  2.11542062],
       [-0.18334221, -0.57890583, -0.89771113, ..., -0.53259158,
        -0.22025385,  0.39816732]])

In [8]:
idx = 0

print(enroll_iv.modelset[idx])

print(enroll_iv.stat1[idx])

print(len(enroll_iv.stat1[idx]))

S01
[-1.5311496  -1.28135981  1.32816947 -0.44887346  0.33056517 -0.06001847
  0.10624327 -0.2176878  -0.17061755 -0.11006908  2.44589498 -0.94355654
 -1.27974243  0.38651779  0.70874621  0.70706126 -0.98676391 -0.40480981
 -0.65365334 -0.06302094  0.59537732  0.49317728 -0.64532526 -0.79574252
  1.09020777]
25
