Skip to content

Commit

Permalink
0.6.4 (#17)
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcauliffe committed Mar 22, 2024
1 parent ac29bcf commit 8b2337e
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 64 deletions.
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ target_link_libraries(_kalpy PUBLIC kaldi-base kaldi-chain
fstscript
)

if(CUDAToolkit_FOUND)
find_library(KALDI_CUDADECODER kaldi-cudadecoder)

if(CUDAToolkit_FOUND AND KALDI_CUDADECODER)

target_link_libraries(_kalpy PUBLIC kaldi-cudadecoder kaldi-cudafeat
)
Expand Down
138 changes: 101 additions & 37 deletions extensions/ivector/ivector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,29 @@ void pybind_plda(py::module &m) {
py::arg("utterance_ivector"),
py::arg("transformed_enrolled_ivectors"),
py::arg("num_enroll_utts"))
.def("score",
[](
PyClass &plda,
const VectorBase<float> & utterance_ivector,
const std::vector<Vector<float>> &transformed_enrolled_ivectors
){
py::gil_scoped_release gil_release;
PldaConfig plda_config;
Vector<double> ivector_one_dbl(utterance_ivector);

std::vector<BaseFloat> scores;

for (int32 j = 0; j < transformed_enrolled_ivectors.size(); j++) {
Vector<double> ivector_two_dbl(transformed_enrolled_ivectors[j]);
scores.push_back(plda.LogLikelihoodRatio(ivector_one_dbl,
1,
ivector_two_dbl));
}
return scores;

},
py::arg("utterance_ivector"),
py::arg("transformed_enrolled_ivectors"))
.def("log_likelihood_distance",
[](
PyClass &plda,
Expand Down Expand Up @@ -912,47 +935,24 @@ void pybind_plda(py::module &m) {
py::buffer_info buf3 = result.request();
double *ptr3 = static_cast<double *>(buf3.ptr);
for (py::size_t i = 0; i < r_one.shape(0); i++){
Vector<double> ivector_one_dbl;
ivector_one_dbl.Resize(r_one.shape(1));
Vector<double> ivector_two_dbl;
ivector_two_dbl.Resize(r_two.shape(1));
for (py::size_t j = 0; j < r_one.shape(1); j++){
ivector_one_dbl(j) = r_one(i, j);
ivector_two_dbl(j) = r_two(i, j);

}
ptr3[i] = 1.0 / Exp(plda.LogLikelihoodRatio(ivector_one_dbl,
1,
ivector_two_dbl));;
Vector<double> ivector_one_dbl;
ivector_one_dbl.Resize(r_one.shape(1));
Vector<double> ivector_two_dbl;
ivector_two_dbl.Resize(r_two.shape(1));
for (py::size_t j = 0; j < r_one.shape(1); j++){
ivector_one_dbl(j) = r_one(i, j);
ivector_two_dbl(j) = r_two(i, j);

}
ptr3[i] = 1.0 / Exp(plda.LogLikelihoodRatio(ivector_one_dbl,
1,
ivector_two_dbl));;

}
return result;
},
py::arg("utterance_one_ivector"),
py::arg("utterance_two_ivector"))
.def("score",
[](
PyClass &plda,
const VectorBase<float> & utterance_ivector,
const std::vector<Vector<float>> &transformed_enrolled_ivectors
){
py::gil_scoped_release gil_release;
PldaConfig plda_config;
Vector<double> ivector_one_dbl(utterance_ivector);

std::vector<BaseFloat> scores;

for (int32 j = 0; j < transformed_enrolled_ivectors.size(); j++) {
Vector<double> ivector_two_dbl(transformed_enrolled_ivectors[j]);
scores.push_back(plda.LogLikelihoodRatio(ivector_one_dbl,
1,
ivector_two_dbl));
}
return scores;

},
py::arg("utterance_ivector"),
py::arg("transformed_enrolled_ivectors"))
.def(py::pickle(
[](const PyClass &p) { // __getstate__
/* Return a tuple that fully encodes the state of the object */
Expand Down Expand Up @@ -1074,6 +1074,7 @@ void pybind_plda(py::module &m) {
py::array_t<double> & transformed_test_ivector
){
py::gil_scoped_release gil_release;

Vector<double> ivector_one_dbl;
auto r1 = transformed_enroll_ivector.unchecked<1>();
ivector_one_dbl.Resize(r1.shape(0));
Expand All @@ -1099,6 +1100,35 @@ void pybind_plda(py::module &m) {
py::arg("transformed_enroll_ivector"),
py::arg("num_enroll_utts"),
py::arg("transformed_test_ivector"))

.def("log_likelihood_ratio",
py::vectorize([](

PyClass &plda,
py::array_t<double> & transformed_enroll_ivector,
int32 num_enroll_utts,
py::array_t<double> & transformed_test_ivector
){
py::gil_scoped_release gil_release;
Vector<double> ivector_one_dbl;
auto r1 = transformed_enroll_ivector.unchecked<1>();
ivector_one_dbl.Resize(r1.shape(0));
for (py::size_t i = 0; i < r1.shape(0); i++)
ivector_one_dbl(i) = r1(i);

Vector<double> ivector_two_dbl;
auto r2 = transformed_test_ivector.unchecked<1>();
ivector_two_dbl.Resize(r2.shape(0));
for (py::size_t i = 0; i < r2.shape(0); i++)
ivector_two_dbl(i) = r2(i);

return plda.LogLikelihoodRatio(ivector_one_dbl, num_enroll_utts, ivector_two_dbl);

}),
"Numpy vectorized function for log-likelihood ratio.",
py::arg("transformed_enroll_ivector"),
py::arg("num_enroll_utts"),
py::arg("transformed_test_ivector"))
.def("SmoothWithinClassCovariance",
&PyClass::SmoothWithinClassCovariance,
"This function smooths the within-class covariance by adding to it, "
Expand Down Expand Up @@ -1427,7 +1457,8 @@ void init_ivector(py::module &_m) {

m.def("ivector_subtract_mean",
[](
std::vector<Vector<float>*> &ivectors
std::vector<Vector<float>*> &ivectors,
bool normalize = true
) {
py::gil_scoped_release gil_release;
Vector<double> sum;
Expand All @@ -1439,7 +1470,40 @@ void init_ivector(py::module &_m) {
for (size_t i = 0; i < ivectors.size(); i++) {
Vector<BaseFloat> *ivector = ivectors[i];
ivector->AddVec(-1.0 / ivectors.size(), sum);
if (normalize){
double norm = ivector->Norm(2.0);
double ratio = norm / sqrt(ivector->Dim());
ivector->Scale(1.0 / ratio);

}
}
},
py::arg("ivectors"));
py::arg("ivectors"),
py::arg("normalize") = true);

m.def("ivector_subtract_mean",
[](
std::vector<Vector<double>*> &ivectors,
bool normalize = true
) {
py::gil_scoped_release gil_release;
Vector<double> sum;

for (size_t i = 0; i < ivectors.size(); i++) {
if (sum.Dim() == 0) sum.Resize(ivectors[i]->Dim());
sum.AddVec(1.0, *ivectors[i]);
}
for (size_t i = 0; i < ivectors.size(); i++) {
Vector<double> *ivector = ivectors[i];
ivector->AddVec(-1.0 / ivectors.size(), sum);
if (normalize){
double norm = ivector->Norm(2.0);
double ratio = norm / sqrt(ivector->Dim());
ivector->Scale(1.0 / ratio);

}
}
},
py::arg("ivectors"),
py::arg("normalize") = true);
}
22 changes: 21 additions & 1 deletion kalpy/feat/pitch.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,11 +271,31 @@ def compute_pitch_for_export(
if len(wave.shape) == 2:
channel = 0 if segment.channel is None else segment.channel
wave = wave[channel, :]
pitch = feat.compute_pitch(wave, self.extraction_opts, self.process_opts)
pitch = self.compute_pitch_for_wave(wave)
if compress:
pitch = CompressedMatrix(pitch)
return pitch

def compute_pitch_for_wave(
self,
wave: np.ndarray,
) -> FloatMatrixBase:
"""
Generate pitch features for exporting to a kaldi archive
Parameters
----------
wave: :class:`~numpy.ndarray`
Waveform
Returns
-------
:class:`_kalpy.matrix.FloatMatrixBase`
Feature matrix for the segment
"""
pitch = feat.compute_pitch(wave, self.extraction_opts, self.process_opts)
return pitch

def export_feats(
self,
file_name: typing.Union[pathlib.Path, str],
Expand Down
18 changes: 9 additions & 9 deletions kalpy/ivector/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
import os
import typing

from _kalpy.matrix import FloatVector
from _kalpy.matrix import DoubleVector
from _kalpy.util import (
RandomAccessBaseFloatVectorReader,
RandomAccessBaseDoubleVectorReader,
RandomAccessInt32VectorVectorReader,
SequentialBaseFloatVectorReader,
SequentialBaseDoubleVectorReader,
SequentialInt32VectorVectorReader,
)
from kalpy.data import PathLike
Expand All @@ -31,7 +31,7 @@ def __init__(self, file_name: PathLike, num_utterances_file_name: PathLike = Non
self.file_name = str(file_name)
self.num_utterances_file_name = num_utterances_file_name
self.read_specifier = generate_read_specifier(file_name)
self.random_reader = RandomAccessBaseFloatVectorReader(self.read_specifier)
self.random_reader = RandomAccessBaseDoubleVectorReader(self.read_specifier)
self.num_utterances_mapping = {}
if self.num_utterances_file_name is not None:
with open(self.num_utterances_file_name) as f:
Expand All @@ -45,18 +45,18 @@ def close(self):
self.random_reader.Close()

@property
def sequential_reader(self) -> SequentialBaseFloatVectorReader:
def sequential_reader(self) -> SequentialBaseDoubleVectorReader:
"""Sequential reader for lattices"""
return SequentialBaseFloatVectorReader(self.read_specifier)
return SequentialBaseDoubleVectorReader(self.read_specifier)

def __iter__(self) -> typing.Generator[typing.Tuple[str, FloatVector]]:
def __iter__(self) -> typing.Generator[typing.Tuple[str, DoubleVector]]:
"""Iterate over the utterance lattices in the archive"""
if self.read_specifier.startswith("scp"):
with open(self.file_name, encoding="utf8") as f:
for line in f:
line = line.strip()
key, ark_path = line.split(maxsplit=1)
ivector = read_kaldi_object(FloatVector, ark_path)
ivector = read_kaldi_object(DoubleVector, ark_path)
num_utterances = self.num_utterances_mapping.get(key, 1)
yield key, ivector, num_utterances
else:
Expand All @@ -74,7 +74,7 @@ def __iter__(self) -> typing.Generator[typing.Tuple[str, FloatVector]]:
def __del__(self):
self.close()

def __getitem__(self, item: str) -> FloatVector:
def __getitem__(self, item: str) -> DoubleVector:
"""Get lattice for a particular key from the archive file"""
item = str(item)
if not self.random_reader.HasKey(item):
Expand Down
59 changes: 43 additions & 16 deletions kalpy/ivector/plda.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@

import numpy as np

from _kalpy.ivector import Plda
from _kalpy.ivector import Plda, ivector_normalize_length, ivector_subtract_mean
from _kalpy.matrix import DoubleVector, FloatVector
from kalpy.utils import read_kaldi_object
from kalpy.ivector.data import IvectorArchive


class PldaScorer:
Expand All @@ -16,31 +17,57 @@ def __init__(
simple_length_norm: bool = True,
):
self.plda_path = str(plda_path)
self.plda = read_kaldi_object(Plda, self.plda_path)
self.plda: Plda = read_kaldi_object(Plda, self.plda_path)
self.normalize_length = normalize_length
self.simple_length_norm = simple_length_norm
self.speaker_ids = None
self.speaker_ivectors = None
self.num_speaker_examples = None

def load_speaker_ivectors(self, speaker_archive_path, num_utts_path=None):
ivector_archive = IvectorArchive(
speaker_archive_path, num_utterances_file_name=num_utts_path
)
speaker_ivectors = []
self.speaker_ids = []
self.num_speaker_examples = []
for speaker_id, ivector, utts in ivector_archive:
self.speaker_ids.append(speaker_id)
self.num_speaker_examples.append(utts)
if self.normalize_length:
ivector_normalize_length(ivector)
speaker_ivectors.append(DoubleVector(ivector))
ivector_subtract_mean(speaker_ivectors,normalize=self.normalize_length)
self.speaker_ivectors = self.plda.transform_ivectors(speaker_ivectors, self.num_speaker_examples)

def transform_ivector(self, ivector: np.ndarray, num_examples: int = 1):
return self.plda.transform_ivector(ivector, num_examples)

def transform_ivectors(self, ivectors: np.ndarray, num_examples: np.ndarray = None):
if num_examples is None:
num_examples = np.ones((ivectors.shape[0]))
return self.plda.transform_ivectors(ivectors, num_examples)

def score_ivectors(
self,
speaker_ivector: typing.Union[np.ndarray, FloatVector, DoubleVector],
utterance_ivector: typing.Union[np.ndarray, FloatVector, DoubleVector],
num_speaker_examples: int = 1,
):
if isinstance(speaker_ivector, np.ndarray):
v = DoubleVector()
v.from_numpy(speaker_ivector)
speaker_ivector = v
elif isinstance(speaker_ivector, FloatVector):
speaker_ivector = DoubleVector(speaker_ivector)

if isinstance(utterance_ivector, np.ndarray):
v = DoubleVector()
v.from_numpy(utterance_ivector)
utterance_ivector = v
elif isinstance(utterance_ivector, FloatVector):
utterance_ivector = DoubleVector(utterance_ivector)

score = self.plda.LogLikelihoodRatio(
speaker_ivector, num_speaker_examples, utterance_ivector
)
return score

def classify_speaker(
self,
utterance_ivector: typing.Union[np.ndarray, FloatVector, DoubleVector],
):
if self.num_speaker_examples is None:
self.num_speaker_examples = [1 for _ in (self.speaker_ivectors.shape[0])]
if isinstance(utterance_ivector, np.ndarray):
utterance_ivector = DoubleVector()
utterance_ivector.from_numpy(utterance_ivector)
ind, score = self.plda.classify_utterance(utterance_ivector, self.speaker_ivectors, self.num_speaker_examples)
speaker = self.speaker_ids[ind]
return speaker, score

0 comments on commit 8b2337e

Please sign in to comment.